#Package

library(dplyr)     
library(stringr)   
library(tidyr)     
library(forcats)
library(ggplot2)    #for basic visualization
library(modeltime)  #modelling time series
library(tidymodels) #modelling time series
library(timetk)     #data wrangling and visualization of time series
library(lubridate)  #for work with dates and datetimes
library(tidyverse)  #data wrangling
library(anomalize)  #anomaly in time series
library(readr)

#Open the file

sales <- read_csv("sales.csv")
products <- read_csv("products.csv")
stores <- read_csv("stores.csv")

#Join all of the file, and drop the unnecessary column

Sales<-sales %>%
  left_join(products, by=c("Product_ID"="Product_ID"))%>%
  left_join(stores, by=c("Store_ID"="Store_ID"))
Sales<-Sales[-c(1,3,4,13)] #Drop Sale_ID, Store ID, Product ID, Store open date
Sales
## # A tibble: 829,262 x 9
##    Date       Units Product_Name     Product_Category Product_Cost Product_Price
##    <date>     <dbl> <chr>            <chr>            <chr>        <chr>        
##  1 2017-01-01     1 Chutes & Ladders Games            $9.99        $12.99       
##  2 2017-01-01     1 Action Figure    Toys             $9.99        $15.99       
##  3 2017-01-01     1 Deck Of Cards    Games            $3.99        $6.99        
##  4 2017-01-01     1 Dart Gun         Sports & Outdoo~ $11.99       $15.99       
##  5 2017-01-01     1 Lego Bricks      Toys             $34.99       $39.99       
##  6 2017-01-01     1 Splash Balls     Sports & Outdoo~ $7.99        $8.99        
##  7 2017-01-01     1 Dart Gun         Sports & Outdoo~ $11.99       $15.99       
##  8 2017-01-01     1 Animal Figures   Toys             $9.99        $12.99       
##  9 2017-01-01     1 Mini Ping Pong ~ Sports & Outdoo~ $6.99        $9.99        
## 10 2017-01-01     1 Deck Of Cards    Games            $3.99        $6.99        
## # ... with 829,252 more rows, and 3 more variables: Store_Name <chr>,
## #   Store_City <chr>, Store_Location <chr>

#Delete the “$” symbol in column product cost and product price and change to numeric type, and make new column to count the total cost and total price

Sales<-Sales%>%
  mutate(Product_Cost=str_replace(Product_Cost,"\\$",""))%>%
  mutate(Product_Price=str_replace(Product_Price,"\\$",""))%>%
  mutate(Product_Cost=as.numeric(Product_Cost))%>%
  mutate(Product_Price=as.numeric(Product_Price))%>%
  mutate(Total_Cost=Units*Product_Cost)%>%
  mutate(Total_Price=Units*Product_Price)
Sales
## # A tibble: 829,262 x 11
##    Date       Units Product_Name     Product_Category Product_Cost Product_Price
##    <date>     <dbl> <chr>            <chr>                   <dbl>         <dbl>
##  1 2017-01-01     1 Chutes & Ladders Games                    9.99         13.0 
##  2 2017-01-01     1 Action Figure    Toys                     9.99         16.0 
##  3 2017-01-01     1 Deck Of Cards    Games                    3.99          6.99
##  4 2017-01-01     1 Dart Gun         Sports & Outdoo~        12.0          16.0 
##  5 2017-01-01     1 Lego Bricks      Toys                    35.0          40.0 
##  6 2017-01-01     1 Splash Balls     Sports & Outdoo~         7.99          8.99
##  7 2017-01-01     1 Dart Gun         Sports & Outdoo~        12.0          16.0 
##  8 2017-01-01     1 Animal Figures   Toys                     9.99         13.0 
##  9 2017-01-01     1 Mini Ping Pong ~ Sports & Outdoo~         6.99          9.99
## 10 2017-01-01     1 Deck Of Cards    Games                    3.99          6.99
## # ... with 829,252 more rows, and 5 more variables: Store_Name <chr>,
## #   Store_City <chr>, Store_Location <chr>, Total_Cost <dbl>, Total_Price <dbl>

#Extract day and month from column Date and make new column based on it and reorder to make it nice to look

Sales<-Sales %>%
  mutate(Day = lubridate::day(Date),
         Month = month.name[lubridate::month(Date, label = TRUE)])

Sales<-Sales[, c(1,12,13,2,3,4,5,6,10,11,7,8,9)]
Sales
## # A tibble: 829,262 x 13
##    Date         Day Month   Units Product_Name     Product_Category Product_Cost
##    <date>     <int> <chr>   <dbl> <chr>            <chr>                   <dbl>
##  1 2017-01-01     1 January     1 Chutes & Ladders Games                    9.99
##  2 2017-01-01     1 January     1 Action Figure    Toys                     9.99
##  3 2017-01-01     1 January     1 Deck Of Cards    Games                    3.99
##  4 2017-01-01     1 January     1 Dart Gun         Sports & Outdoo~        12.0 
##  5 2017-01-01     1 January     1 Lego Bricks      Toys                    35.0 
##  6 2017-01-01     1 January     1 Splash Balls     Sports & Outdoo~         7.99
##  7 2017-01-01     1 January     1 Dart Gun         Sports & Outdoo~        12.0 
##  8 2017-01-01     1 January     1 Animal Figures   Toys                     9.99
##  9 2017-01-01     1 January     1 Mini Ping Pong ~ Sports & Outdoo~         6.99
## 10 2017-01-01     1 January     1 Deck Of Cards    Games                    3.99
## # ... with 829,252 more rows, and 6 more variables: Product_Price <dbl>,
## #   Total_Cost <dbl>, Total_Price <dbl>, Store_Name <chr>, Store_City <chr>,
## #   Store_Location <chr>

#Check the NA rows

colSums(is.na(Sales))
##             Date              Day            Month            Units 
##                0                0                0                0 
##     Product_Name Product_Category     Product_Cost    Product_Price 
##                0                0                0                0 
##       Total_Cost      Total_Price       Store_Name       Store_City 
##                0                0                0                0 
##   Store_Location 
##                0

#Lets look total Sales each day

Sales_day<-Sales%>%
  group_by(Day)%>%
  summarise(Total.Sales=sum(Total_Price))%>%
  arrange(desc(Total.Sales))

ggplot(data=Sales_day,aes(
  x=reorder(Day,Total.Sales),
  y=Total.Sales))+
geom_col(aes(fill=Total.Sales),show.legend = FALSE)+
theme_bw()+
theme(axis.text=element_text(size=10),
      axis.title=element_text(size=12, colour="black"))+
labs(title = "Total Sales Each Day",
     subtitle = "January 2017 - September 2018",
     x = "Day",
     y = "Total Sales")

Look at that, Day 1, 15, 30 the first date, the middle date, the end date is it coincidence ??, nobody knows. Day 31, is the lowet because yeah, we know that not all of month has day 31

Sales%>%
  group_by(Day, Product_Category)%>%
  summarise(Total.Price = sum(Total_Price))%>%
  
  ggplot(aes(
    x=Day,
    y=Total.Price,
    fill=Product_Category))+
  geom_col(position = "stack")+
  theme_bw()+
  theme(legend.position = "bottom",
        legend.title = element_blank())+
  scale_x_continuous(breaks = 1:31)+
  labs(title = "Sales Composition Each Day Based on Product Category",
       subtitle = "January 2017 - September 2018",
       x = "Day",
       y = "Total Sales")
## `summarise()` has grouped output by 'Day'. You can override using the `.groups` argument.

Sales%>%
  group_by(Day, Product_Category)%>%
  summarise(count=n())%>%
  
  ggplot(aes(
    x=Day,
    y=count,
    fill=Product_Category))+
  geom_col(position = "stack")+
  theme_bw()+
  theme(legend.position = "bottom",
        legend.title = element_blank())+
  scale_x_continuous(breaks = 1:31)+
  labs(title = "Total Unit Sold Composition Each Day Based on Product Category",
       subtitle = "January 2017 - September 2018",
       x = "Day",
       y = "Total Unit")
## `summarise()` has grouped output by 'Day'. You can override using the `.groups` argument.

#Lets look total Sales each month

Sales_month<-Sales%>%
  group_by(Month)%>%
  summarise(Total.Sales=sum(Total_Price))%>%
  arrange(desc(Total.Sales))

ggplot(data=Sales_month,aes(
  x=reorder(Month,Total.Sales),
  y=Total.Sales,
  label=Total.Sales))+
  geom_col(aes(fill=Total.Sales),show.legend = FALSE)+
  coord_flip()+
  theme_bw()+
  theme(axis.text=element_text(size=10),
        axis.title=element_text(size=12, colour="black"))+
  geom_label(aes(fill=Total.Sales),
             colour="white",
             size=4,
             show.legend = FALSE,
             position = position_stack(0.9))+
  labs(title = "Total Sales Each Month",
       subtitle = "January 2017 - September 2018",
       x = "Month",
       y = "Total Sales")

Wow, April is the highest sales, october is the least or I can say the Q4 is the lowest because the data just to september 2018, now exclude the Q4, Q2 have the highest sales (April, May, June)

Sales%>%
  group_by(Month, Product_Category)%>%
  summarise(Total.Price = sum(Total_Price))%>%
  
  ggplot(aes(
    x=fct_rev(factor(Month,levels=month.name)),
    y=Total.Price,
    fill=Product_Category))+
  geom_col(position = "stack")+
  theme_bw()+
  coord_flip()+
  theme(legend.position = "bottom",
        legend.title = element_blank())+
  labs(title = "Sales Composition Each Month Based on Product Category",
       subtitle = "Sales From January 2017 - September 2018",
       x = "Month",
       y = "Total Sales")
## `summarise()` has grouped output by 'Month'. You can override using the `.groups` argument.

Sales%>%
  group_by(Month, Product_Category)%>%
  summarise(count=n())%>%
  
  ggplot(aes(
    x=fct_rev(factor(Month,levels=month.name)),
    y=count,
    fill=Product_Category))+
  geom_col(position = "stack")+
  theme_bw()+
  coord_flip()+
  theme(legend.position = "bottom",
        legend.title = element_blank())+
  labs(title = "Total Unit Sold Composition Each Month Based on Product Category",
       subtitle = "January 2017 - September 2018",
       x = "Month",
       y = "Total Unit")
## `summarise()` has grouped output by 'Month'. You can override using the `.groups` argument.

#Lets check the top 10 product

Sales%>%
  group_by(Product_Name)%>%
  summarise(Total.Price=sum(Total_Price))%>%
  arrange(desc(Total.Price))%>%
  head(10)%>%

ggplot(aes(
  x=reorder(Product_Name,Total.Price), 
  y = Total.Price, 
  label = Total.Price))+
geom_col(aes(fill = Total.Price), show.legend = FALSE)+
coord_flip()+
theme_bw()+
theme(axis.text = element_text(size = 10), 
      axis.title = element_text(size = 12, colour = "black"))+
geom_label(aes(fill = Total.Price),
           colour = "white",
           size = 4,
           show.legend = FALSE,
           position = position_stack(0.9))+
labs(title = "Total Sales Based on Product Names",
     subtitle = "Top 10 Product From January 2017 - September 2018",
     x = "Product Names",
     y = "Total Sales")

Lego Bricks has the highest total sales

#Now lets see the Store name that made the highest sales

Sales%>%
  group_by(Store_Name)%>%
  summarise(Total.Price=sum(Total_Price))%>%
  arrange(desc(Total.Price)) %>%
  head(10)%>%


ggplot(aes(
  x=reorder(Store_Name,Total.Price), 
  y = Total.Price, 
  label = Total.Price))+
geom_col(aes(fill = Total.Price), show.legend = FALSE)+
coord_flip()+
theme_bw()+
theme(axis.text = element_text(size = 8), 
      axis.title = element_text(size = 10, colour = "black"))+
geom_label(aes(fill = Total.Price),
           colour = "white",
           size = 4,
           show.legend = FALSE,
           position = position_stack(0.9))+
labs(title = "Total Sales Based on Stores Name",
     subtitle = "Top 10 Stores From January 2017 - September 2018",
     x = "Store Names",
     y = "Total Sales")

Maven Toys Cludad de Mexico 1 has the highest sales

#Now lets see the store location

Sales%>%
  group_by(Store_Location)%>%
  summarise(Total.Price=sum(Total_Price))%>%
  arrange(desc(Total.Price))%>%
  
ggplot(aes(
  x=Store_Location, 
  y =Total.Price, 
  label =round(Total.Price, digits = 2)))+
geom_col(aes(fill = Total.Price), show.legend = FALSE)+
coord_flip()+
theme_bw()+
theme(axis.text = element_text(size = 10), 
      axis.title = element_text(size = 12, colour = "black"))+
geom_label(aes(fill = Total.Price),
           colour = "white",
           size = 5,
           show.legend = FALSE,
           position = position_stack(.9))+
labs(title = "Store Location Total Sales",
     subtitle = "January 2017 - September 2018",
     x = "Location",
     y = "Total Sales")

WOW, Look at that, downtown location is dominating

#PERCENTAGE

Sales%>%
  select(Store_Location, Product_Category, Total_Price)%>%
  group_by(Store_Location, Product_Category)%>%
  summarise(Total.Price = sum(Total_Price))%>%
  mutate(Percent = round(Total.Price / sum(Total.Price)*100 , digits = 2))%>%
  
ggplot(aes(
  x=Store_Location,
  y=Percent,
  fill=Product_Category))+
geom_bar(stat = "identity")+
geom_text(aes(label = paste(Percent,"%"), y = Percent), 
            position = position_stack(vjust = 0.5))+
coord_flip()+
theme_bw()+
theme(legend.position = "bottom",
      legend.title = element_blank())+

labs(title = "Sales Composition in Store Location Based on Product Category",
     subtitle = "January 2017 - September 2018",
     x = "Location",
     y = "Percentage")
## `summarise()` has grouped output by 'Store_Location'. You can override using the `.groups` argument.

Sales%>%
  select(Store_Location, Product_Category, Total_Price)%>%
  group_by(Store_Location, Product_Category)%>%
  summarise(Total.Price = sum(Total_Price))%>%

ggplot(aes(
  x=reorder(Store_Location,Total.Price),
  y=Total.Price,
  fill=Product_Category))+
geom_col(position = "stack")+
coord_flip()+
theme_bw()+
theme(legend.position = "bottom",
      legend.title = element_blank())+
labs(title = "Sales Composition in Store Location Based on Product Category",
     subtitle = "January 2017 - September 2018",
     x = "Location",
     y = "Total Sales")
## `summarise()` has grouped output by 'Store_Location'. You can override using the `.groups` argument.

Sales%>%
  select(Store_Location, Product_Category, Total_Price)%>%
  group_by(Store_Location, Product_Category)%>%
  summarise(count=n())%>%

ggplot(aes(
  x=reorder(Store_Location,count),
  y=count,
  fill=Product_Category))+
geom_col(position = "stack")+
coord_flip()+
theme_bw()+
theme(legend.position = "bottom",
      legend.title = element_blank())+
labs(title = "Total Unit Sold Composition in Store Location Based on Product Category",
     subtitle = "January 2017 - September 2018",
     x = "Location",
     y = "Total Unit")
## `summarise()` has grouped output by 'Store_Location'. You can override using the `.groups` argument.

#MENCOBA

Sales %>%
  mutate(DayName=lubridate::wday(Date, label = TRUE))%>%
  group_by(DayName,Product_Category)%>%
  summarise(count=n()) %>%

ggplot(aes(
  x=reorder(DayName,count),
  y=count,
  fill=Product_Category))+
geom_col(position = "stack")+
coord_flip()+
theme_bw()+
theme(legend.position = "bottom",
      legend.title = element_blank())+
labs(title = "Total Unit Sold Composition Based on Product Category",
     subtitle = "January 2017 - September 2018",
     x = "Product Category",
     y = "Total Unit")
## `summarise()` has grouped output by 'DayName'. You can override using the `.groups` argument.

#MENCOBA

Sales %>%
  mutate(DayName=lubridate::wday(Date, label = TRUE))%>%
  group_by(DayName,Product_Category, Store_Location)%>%
  summarise(count=n())%>%
  
ggplot(aes(
  x=DayName,
  y=count,
  fill=Product_Category))+
geom_col()+
facet_wrap(~Store_Location)+
theme_bw()+
theme(legend.position = "bottom",
      legend.title=element_blank())+
labs(title = "",
     x = "Day Name",
     y = "Total Unit")
## `summarise()` has grouped output by 'DayName', 'Product_Category'. You can override using the `.groups` argument.

#sales daily time series

sales_daily<-Sales%>%
  mutate(Date=floor_date(Date, unit="day"))%>%
  group_by(Date)%>%
  summarise(Total.Price=sum(Total_Price))
sales_daily%>%plot_time_series(Date,Total.Price,
                               .title = "Daily Sales January 2017 - September 2018")

#check the anomaly and clear

sales_daily%>%
  plot_anomaly_diagnostics(Date, Total.Price)
sales_daily2<-sales_daily%>%
  time_decompose(Total.Price)%>%
  anomalize(remainder)%>%
  clean_anomalies()%>%
  select(Date, observed_cleaned)%>%
  rename(Total.Price=observed_cleaned)
sales_daily2%>%plot_time_series(Date, Total.Price,
                               .title = "Daily Sales January 2017 - September 2018")

#Check the stationary

library(tseries)
sales_daily_test=ts(sales_daily2$Total.Price, start = c(2017,1,1),frequency = 365.25)

test for stationary with adf and pp

adf.test(sales_daily_test);
## 
##  Augmented Dickey-Fuller Test
## 
## data:  sales_daily_test
## Dickey-Fuller = -4.1858, Lag order = 8, p-value = 0.01
## alternative hypothesis: stationary
pp.test(sales_daily_test)
## 
##  Phillips-Perron Unit Root Test
## 
## data:  sales_daily_test
## Dickey-Fuller Z(alpha) = -214.28, Truncation lag parameter = 6, p-value
## = 0.01
## alternative hypothesis: stationary

since in adf and pp test, p-value less than 0.05, the data is stationary

sales_daily2%>%
  plot_seasonal_diagnostics(Date, Total.Price,
                            .feature_set = c("wday.lbl","month.lbl"))

#Now lets split date train and date test, (train80% ; test20%)

splits<-initial_time_split(sales_daily2, prop = 0.8)

#you can also use this  
#splits<-time_series_split(sales_daily,
#                          assess = "4 months",
#                          cumulative = TRUE) #tells the sampling to use all of the prior data as the training set.
  
splits%>%
  tk_time_series_cv_plan()%>%
  plot_time_series_cv_plan(Date,Total.Price,
                           .title = "Data Split into Training and Testing")

#Snaive

model_snaive<-naive_reg()%>%
  set_engine("snaive")%>%
  fit(Total.Price~Date, training(splits))
model_snaive
## parsnip model object
## 
## Fit time:  90ms 
## SNAIVE [7]
## --------
## Model: 
## # A tibble: 7 x 2
##   Date        value
##   <date>      <dbl>
## 1 2018-05-19 36488.
## 2 2018-05-20 41634.
## 3 2018-05-21 20881.
## 4 2018-05-22 19508.
## 5 2018-05-23 20939.
## 6 2018-05-24 19333.
## 7 2018-05-25 30387.

#ETS

model_ets<-exp_smoothing()%>%
  set_engine("ets")%>%
  fit(Total.Price~Date, training(splits))
model_ets
## parsnip model object
## 
## Fit time:  2s 
## ETS(M,N,M) 
## 
## Call:
##  forecast::ets(y = outcome, model = model_ets, damped = damping_ets,  
## 
##  Call:
##      alpha = alpha, beta = beta, gamma = gamma) 
## 
##   Smoothing parameters:
##     alpha = 0.1706 
##     gamma = 1e-04 
## 
##   Initial states:
##     l = 18177.9105 
##     s = 1.3066 1.0586 0.8522 0.8276 0.7561 0.8397
##            1.3593
## 
##   sigma:  0.1629
## 
##      AIC     AICc      BIC 
## 11486.94 11487.38 11529.28

#ARIMA

model_arima<-arima_reg()%>%
  set_engine("auto_arima")%>%
  fit(Total.Price~Date, training(splits))
model_arima
## parsnip model object
## 
## Fit time:  1.6s 
## Series: outcome 
## ARIMA(1,0,1)(1,1,1)[7] 
## 
## Coefficients:
##          ar1      ma1     sar1     sma1
##       0.9492  -0.7739  -0.1426  -0.8474
## s.e.  0.0294   0.0600   0.0513   0.0344
## 
## sigma^2 estimated as 12476397:  log likelihood=-4825.85
## AIC=9661.69   AICc=9661.82   BIC=9682.8

#Prophet

model_prophet<-prophet_reg(
  seasonality_yearly  = TRUE)%>%
  set_engine("prophet")%>%
  fit(Total.Price~Date, training(splits))
model_prophet
## parsnip model object
## 
## Fit time:  4.2s 
## PROPHET Model
## - growth: 'linear'
## - n.changepoints: 25
## - changepoint.range: 0.8
## - yearly.seasonality: 'TRUE'
## - weekly.seasonality: 'auto'
## - daily.seasonality: 'auto'
## - seasonality.mode: 'additive'
## - changepoint.prior.scale: 0.05
## - seasonality.prior.scale: 10
## - holidays.prior.scale: 10
## - logistic_cap: NULL
## - logistic_floor: NULL
## - extra_regressors: 0

PENJELASAN: lihat di book modeltime

#modeltimetable

model_tbl<-modeltime_table(
  model_snaive,
  model_ets,
  model_arima,
  model_prophet)
model_tbl
## # Modeltime Table
## # A tibble: 4 x 3
##   .model_id .model   .model_desc           
##       <int> <list>   <chr>                 
## 1         1 <fit[+]> SNAIVE [7]            
## 2         2 <fit[+]> ETS(M,N,M)            
## 3         3 <fit[+]> ARIMA(1,0,1)(1,1,1)[7]
## 4         4 <fit[+]> PROPHET

#calibrate

calibrate_tbl<-model_tbl%>%
  modeltime_calibrate(testing(splits))
calibrate_tbl
## # Modeltime Table
## # A tibble: 4 x 5
##   .model_id .model   .model_desc            .type .calibration_data 
##       <int> <list>   <chr>                  <chr> <list>            
## 1         1 <fit[+]> SNAIVE [7]             Test  <tibble [128 x 4]>
## 2         2 <fit[+]> ETS(M,N,M)             Test  <tibble [128 x 4]>
## 3         3 <fit[+]> ARIMA(1,0,1)(1,1,1)[7] Test  <tibble [128 x 4]>
## 4         4 <fit[+]> PROPHET                Test  <tibble [128 x 4]>

~Calibration is how confidence intervals and accuracy metrics are determined. ~Calibration Data is simply forecasting predictions and residuals that are calculated from out-of-sample data. ~After calibrating, the calibration data follows the data through the forecasting workflow.

#accuracy

calibrate_tbl%>%modeltime_accuracy()
## # A tibble: 4 x 9
##   .model_id .model_desc            .type   mae  mape  mase smape  rmse   rsq
##       <int> <chr>                  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1         1 SNAIVE [7]             Test  4348.  19.8 0.778  17.6 5522. 0.700
## 2         2 ETS(M,N,M)             Test  3870.  19.3 0.692  16.8 4689. 0.721
## 3         3 ARIMA(1,0,1)(1,1,1)[7] Test  3965.  19.2 0.709  16.8 4917. 0.712
## 4         4 PROPHET                Test  3172.  15.1 0.567  14.0 3953. 0.733

#Test Set Visualization

calibrate_tbl%>%
  modeltime_forecast(
    new_data = testing(splits),
    actual_data = sales_daily2)%>%
  plot_modeltime_forecast()%>%
  plotly::layout(
    legend=list(
      orientation="h",
      xanchor="center",
      x=0.5,
      y=-0.2))

#FORECAST FUTURE

future_forecast_tbl<-calibrate_tbl%>%
  modeltime_refit(sales_daily2)%>% #retrain on full data
  modeltime_forecast(
    h = "3 months",
    actual_data = sales_daily2)

future_forecast_tbl%>%
  plot_modeltime_forecast()%>%
  plotly::layout(
    legend=list(
      orientation="h",
      xanchor="center",
      x=0.5,
      y=-0.2))

#Forecast with prophet

model_prophet2<-prophet_reg(
  growth = "linear", #see the trend
  prior_scale_changepoints = 0.4, #Parameter modulating the flexibility of the automatic changepoint selection
  prior_scale_seasonality = 5, #modulating the strength of the seasonality model
  seasonality_yearly  = TRUE,
  seasonality_weekly = TRUE,
  seasonality_daily = FALSE)%>%
  set_engine("prophet")%>%
  fit(Total.Price~Date, training(splits))
model_prophet2
## parsnip model object
## 
## Fit time:  350ms 
## PROPHET Model
## - growth: 'linear'
## - n.changepoints: 25
## - changepoint.range: 0.8
## - yearly.seasonality: 'TRUE'
## - weekly.seasonality: 'TRUE'
## - daily.seasonality: 'FALSE'
## - seasonality.mode: 'additive'
## - changepoint.prior.scale: 0.4
## - seasonality.prior.scale: 5
## - holidays.prior.scale: 10
## - logistic_cap: NULL
## - logistic_floor: NULL
## - extra_regressors: 0
model_tbl2<-modeltime_table(
  model_prophet,
  model_prophet2)

calibrate_tbl2<-model_tbl2%>%
  modeltime_calibrate(testing(splits))

calibrate_tbl2%>%modeltime_accuracy()
## # A tibble: 2 x 9
##   .model_id .model_desc .type   mae  mape  mase smape  rmse   rsq
##       <int> <chr>       <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1         1 PROPHET     Test  3172.  15.1 0.567  14.0 3953. 0.733
## 2         2 PROPHET     Test  3185.  14.0 0.570  13.9 3957. 0.737
calibrate_tbl2%>%
  modeltime_forecast(
    new_data = testing(splits),
    actual_data = sales_daily2)%>%
  plot_modeltime_forecast(.title = "Test Plot")%>%
  plotly::layout(
    legend=list(
      orientation="h",
      xanchor="center",
      x=0.5,
      y=-0.2))
future_forecast_tbl2<-calibrate_tbl2%>%
  modeltime_refit(sales_daily2)%>% #retrain on full data
  modeltime_forecast(
    h = "3 months",
    actual_data = sales_daily2)

future_forecast_tbl2%>%
  plot_modeltime_forecast(.title = "Forecast 3 Months")%>%
  plotly::layout(
    legend=list(
      orientation="h",
      xanchor="center",
      x=0.5,
      y=-0.2))